{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 导入所需要的包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np    # Python中进行数值计算的库\n",
    "import pandas as pd    # Python中进行数据处理的库\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore') #  忽略弹出的warnings"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 读数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 207,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('Lending_Club.csv',encoding=\"ISO-8859-1\")#这里注意，一定要把encoding设置为ISO-8859-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(130772, 145)"
      ]
     },
     "execution_count": 208,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape #查看数据量大小"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 209,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>member_id</th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>sub_grade</th>\n",
       "      <th>...</th>\n",
       "      <th>hardship_payoff_balance_amount</th>\n",
       "      <th>hardship_last_payment_amount</th>\n",
       "      <th>disbursement_method</th>\n",
       "      <th>debt_settlement_flag</th>\n",
       "      <th>debt_settlement_flag_date</th>\n",
       "      <th>settlement_status</th>\n",
       "      <th>settlement_date</th>\n",
       "      <th>settlement_amount</th>\n",
       "      <th>settlement_percentage</th>\n",
       "      <th>settlement_term</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10000</td>\n",
       "      <td>10000</td>\n",
       "      <td>10000</td>\n",
       "      <td>36 months</td>\n",
       "      <td>0.2039</td>\n",
       "      <td>373.63</td>\n",
       "      <td>D</td>\n",
       "      <td>D4</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8000</td>\n",
       "      <td>8000</td>\n",
       "      <td>8000</td>\n",
       "      <td>36 months</td>\n",
       "      <td>0.0683</td>\n",
       "      <td>246.40</td>\n",
       "      <td>A</td>\n",
       "      <td>A3</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20000</td>\n",
       "      <td>20000</td>\n",
       "      <td>20000</td>\n",
       "      <td>60 months</td>\n",
       "      <td>0.0683</td>\n",
       "      <td>394.43</td>\n",
       "      <td>A</td>\n",
       "      <td>A3</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16000</td>\n",
       "      <td>16000</td>\n",
       "      <td>16000</td>\n",
       "      <td>36 months</td>\n",
       "      <td>0.1403</td>\n",
       "      <td>547.08</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4 rows × 145 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  member_id  loan_amnt  funded_amnt  funded_amnt_inv        term  \\\n",
       "0 NaN        NaN      10000        10000            10000   36 months   \n",
       "1 NaN        NaN       8000         8000             8000   36 months   \n",
       "2 NaN        NaN      20000        20000            20000   60 months   \n",
       "3 NaN        NaN      16000        16000            16000   36 months   \n",
       "\n",
       "   int_rate  installment grade sub_grade  ... hardship_payoff_balance_amount  \\\n",
       "0    0.2039       373.63     D        D4  ...                            NaN   \n",
       "1    0.0683       246.40     A        A3  ...                            NaN   \n",
       "2    0.0683       394.43     A        A3  ...                            NaN   \n",
       "3    0.1403       547.08     C        C2  ...                            NaN   \n",
       "\n",
       "  hardship_last_payment_amount disbursement_method  debt_settlement_flag  \\\n",
       "0                          NaN                Cash                     N   \n",
       "1                          NaN                Cash                     N   \n",
       "2                          NaN           DirectPay                     N   \n",
       "3                          NaN                Cash                     N   \n",
       "\n",
       "  debt_settlement_flag_date settlement_status settlement_date  \\\n",
       "0                       NaN               NaN             NaN   \n",
       "1                       NaN               NaN             NaN   \n",
       "2                       NaN               NaN             NaN   \n",
       "3                       NaN               NaN             NaN   \n",
       "\n",
       "  settlement_amount  settlement_percentage  settlement_term  \n",
       "0               NaN                    NaN              NaN  \n",
       "1               NaN                    NaN              NaN  \n",
       "2               NaN                    NaN              NaN  \n",
       "3               NaN                    NaN              NaN  \n",
       "\n",
       "[4 rows x 145 columns]"
      ]
     },
     "execution_count": 209,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看部分数据\n",
    "data.head(4)#可以看出有很多确实值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 查看数据集缺失情况"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>miss_num</th>\n",
       "      <th>miss_percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>settlement_term</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>payment_plan_start_date</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>member_id</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>url</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>desc</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>settlement_percentage</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>hardship_type</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>hardship_reason</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>hardship_status</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>deferral_term</td>\n",
       "      <td>130772</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   feature  miss_num  miss_percentage\n",
       "0          settlement_term    130772              1.0\n",
       "1  payment_plan_start_date    130772              1.0\n",
       "2                member_id    130772              1.0\n",
       "3                      url    130772              1.0\n",
       "4                     desc    130772              1.0\n",
       "5    settlement_percentage    130772              1.0\n",
       "6            hardship_type    130772              1.0\n",
       "7          hardship_reason    130772              1.0\n",
       "8          hardship_status    130772              1.0\n",
       "9            deferral_term    130772              1.0"
      ]
     },
     "execution_count": 210,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "### 查看数据集缺失情况\n",
    "missingDf = data.isnull().sum().sort_values(ascending=False).reset_index()#常用的数据缺失值查看方法，ascending=False是降序排列\n",
    "missingDf.columns = ['feature', 'miss_num']    \n",
    "missingDf['miss_percentage'] = missingDf['miss_num'] / data.shape[0]    # 缺失值比例\n",
    "missingDf.head(10)    # 缺失值最多的前十列特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "原始数据集144列特征中:\n",
      "\n",
      "有60列特征含有缺失值\n",
      "有43列的特征缺失值比例在30%以上\n"
     ]
    }
   ],
   "source": [
    "print(\"原始数据集144列特征中:\\n\" )\n",
    "print(\"有%d列特征含有缺失值\" % missingDf[missingDf['miss_num'] > 0].shape[0])\n",
    "print(\"有%d列的特征缺失值比例在30%%以上\" % missingDf[missingDf['miss_percentage'] > 0.3].shape[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 缺失值处理\n",
    "  1：直接删除缺失值过多的特征或样本\n",
    "  2：对缺失值进行填充，一般使用平均值或者最大最小值\n",
    "  3：用其他未缺失的样本对缺失的样本特征进行预测"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1、直接删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "去除掉缺失值占比大于0.3的特征之后,当前还剩102列特征\n"
     ]
    }
   ],
   "source": [
    "thr = (1 - 0.3) * data.shape[0]  # 可以根据实际情况设定不同阈值, 此处设为30%, 则非缺失值的数量大于70%\n",
    "data = data.dropna(thresh=thr, axis=1)     #若某一列数据缺失的数量超过阀值就会被删除\n",
    "print(\"去除掉缺失值占比大于0.3的特征之后,当前还剩%d列特征\" %(data.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 213,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0     63753\n",
      "1     41794\n",
      "2     12719\n",
      "3      6714\n",
      "4      3483\n",
      "5      1439\n",
      "6       551\n",
      "7       205\n",
      "8        65\n",
      "9        21\n",
      "10       18\n",
      "11        9\n",
      "13        1\n",
      "Name: row_missing, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "### 缺失特征过多的样本可以考虑直接整行删除\n",
    "data['row_missing'] = data.apply(lambda x: x.isnull().sum(), axis=1).to_frame()     # 计算每条样本的缺失值列数\n",
    "print(data['row_missing'].value_counts())    # 观察所有样本行的缺失值情况"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "缺失列数在8列以上的样本有114条，缺失值较多且样本数较少，因此可以考虑将这些样本直接删除。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "去除掉缺失特征大于35列的样本之后，当前还剩130658行数据\n"
     ]
    }
   ],
   "source": [
    "data = data[data['row_missing'] < 8]       # 删掉缺失值大于等于8列的样本\n",
    "data.drop(['row_missing'], axis=1, inplace=True)    # 删掉刚刚加入原始数据集的统计列\n",
    "print(\"去除掉缺失特征大于35列的样本之后，当前还剩%d行数据\" % data.shape[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2.使用固定值进行填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>last_pymnt_d</td>\n",
       "      <td>32834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>il_util</td>\n",
       "      <td>21951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>mths_since_recent_inq</td>\n",
       "      <td>15514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>emp_title</td>\n",
       "      <td>10578</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>emp_length</td>\n",
       "      <td>10385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>chargeoff_within_12_mths</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>delinq_amnt</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>mo_sin_old_rev_tl_op</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>mo_sin_rcnt_rev_tl_op</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>loan_amnt</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>102 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        index      0\n",
       "0                last_pymnt_d  32834\n",
       "1                     il_util  21951\n",
       "2       mths_since_recent_inq  15514\n",
       "3                   emp_title  10578\n",
       "4                  emp_length  10385\n",
       "..                        ...    ...\n",
       "97   chargeoff_within_12_mths      0\n",
       "98                delinq_amnt      0\n",
       "99       mo_sin_old_rev_tl_op      0\n",
       "100     mo_sin_rcnt_rev_tl_op      0\n",
       "101                 loan_amnt      0\n",
       "\n",
       "[102 rows x 2 columns]"
      ]
     },
     "execution_count": 215,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull().sum().sort_values(ascending=False).reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以emp_length这列特征为例，它代表贷款客户的工作年限，统计可知其中有10385条缺失值，原始数据集中用'n/a'表示。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "原始数据集的emp_length特征取值情况：\n",
      " 10+ years    43802\n",
      "2 years      12290\n",
      "3 years      10865\n",
      "< 1 year      8894\n",
      "1 year        8635\n",
      "4 years       8631\n",
      "5 years       8199\n",
      "6 years       6088\n",
      "7 years       4955\n",
      "8 years       4334\n",
      "9 years       3580\n",
      "Name: emp_length, dtype: int64\n",
      "\n",
      "使用固定值填充后emp_length特征取值情况：\n",
      " 10+ years    43802\n",
      "2 years      12290\n",
      "3 years      10865\n",
      "< 1 year      8894\n",
      "1 year        8635\n",
      "4 years       8631\n",
      "5 years       8199\n",
      "6 years       6088\n",
      "7 years       4955\n",
      "8 years       4334\n",
      "9 years       3580\n",
      "Name: emp_length, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "### 特征emp_length中的缺失值可以尝试用特殊字符表示，代表独立的一类特征值。\n",
    "print(\"原始数据集的emp_length特征取值情况：\\n\", data['emp_length'].value_counts())\n",
    "data.loc[data['emp_length']=='', 'emp_ln/aength'] = 'Unknown'    # 将emp_length特征列中的缺失值替换为特殊字符'Unknown'\n",
    "print(\"\\n使用固定值填充后emp_length特征取值情况：\\n\", data['emp_length'].value_counts())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数值特征通常采用其他样本的均值或中位数进行填充。以dti为例，它代表贷款人每月还款占其收入的比例（去掉百分号后的值），观察其统计信息。可知该列最大值为999.000000，很明显这是一个极大的异常值，如果对该列采用均值进行填充，会受到极端值的影响，故此处选用中位数进行填充。当然，这种简单的方法还有提升的空间，例如像dti这种特征可以结合贷款人的收入水平（annual_inc）进行先分组后填充。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dti特征列有317个缺失值\n",
      "\n",
      "dti特征列的统计信息:\n",
      " count    130341.000000\n",
      "mean         19.296477\n",
      "std          19.480767\n",
      "min           0.000000\n",
      "25%          11.100000\n",
      "50%          17.390000\n",
      "75%          24.730000\n",
      "max         999.000000\n",
      "Name: dti, dtype: float64\n",
      "\n",
      "填充中位数后dti特征列的统计信息:\n",
      " count    130658.000000\n",
      "mean         19.291851\n",
      "std          19.457347\n",
      "min           0.000000\n",
      "25%          11.110000\n",
      "50%          17.390000\n",
      "75%          24.717500\n",
      "max         999.000000\n",
      "Name: dti, dtype: float64\n",
      "\n",
      "此时dti特征列有0个缺失值\n",
      "\n"
     ]
    }
   ],
   "source": [
    "### dti属性是数值特征，可以使用均值或中位数进行填充\n",
    "print(\"dti特征列有%d个缺失值\\n\" % data['dti'].isnull().sum())\n",
    "print(\"dti特征列的统计信息:\\n\", data['dti'].describe())\n",
    "data['dti'].fillna(data['dti'].median(), inplace=True)    # 这里median()采用中位数填充,也可以使用均值mean()\n",
    "print(\"\\n填充中位数后dti特征列的统计信息:\\n\", data['dti'].describe())\n",
    "print(\"\\n此时dti特征列有%d个缺失值\\n\" % data['dti'].isnull().sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3、建模预测缺失值并填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 218,
   "metadata": {},
   "outputs": [],
   "source": [
    "### 以revol_util(信用账户的使用率)特征为例，导入sklearn的随机森林算法预测缺失值\n",
    "from sklearn.ensemble import RandomForestRegressor    \n",
    "\n",
    "rfDf = data.copy()\n",
    "# 用revol_util特征值非空的样本构建训练集，revol_util特征值缺失的样本构建测试集\n",
    "rfDf_train = rfDf.loc[rfDf['revol_util'].notnull()]\n",
    "rfDf_test = rfDf.loc[rfDf['revol_util'].isnull()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>loan_amnt</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>funded_amnt</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>funded_amnt_inv</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>term</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>int_rate</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>total_il_high_credit_limit</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>hardship_flag</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>disbursement_method</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>debt_settlement_flag</td>\n",
       "      <td>130528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>emp_ln/aength</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>103 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                          index       0\n",
       "0                     loan_amnt  130528\n",
       "1                   funded_amnt  130528\n",
       "2               funded_amnt_inv  130528\n",
       "3                          term  130528\n",
       "4                      int_rate  130528\n",
       "..                          ...     ...\n",
       "98   total_il_high_credit_limit  130528\n",
       "99                hardship_flag  130528\n",
       "100         disbursement_method  130528\n",
       "101        debt_settlement_flag  130528\n",
       "102               emp_ln/aength       0\n",
       "\n",
       "[103 rows x 2 columns]"
      ]
     },
     "execution_count": 219,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfDf_train.notnull().sum().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loan_amnt                       int64\n",
       "funded_amnt                     int64\n",
       "funded_amnt_inv                 int64\n",
       "term                           object\n",
       "int_rate                      float64\n",
       "                               ...   \n",
       "total_il_high_credit_limit      int64\n",
       "hardship_flag                  object\n",
       "disbursement_method            object\n",
       "debt_settlement_flag           object\n",
       "emp_ln/aength                  object\n",
       "Length: 103, dtype: object"
      ]
     },
     "execution_count": 220,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "metadata": {},
   "outputs": [],
   "source": [
    "col = ['loan_amnt', 'int_rate', 'installment', 'revol_bal', 'collection_recovery_fee']    # 原始数据集中的无缺失数值特征\n",
    "# 划分训练数据和标签（label）\n",
    "X = rfDf_train[col]\n",
    "y = rfDf_train['revol_util']\n",
    "# 训练过程\n",
    "rf = RandomForestRegressor(n_estimators=100,n_jobs=-1)    # 这里重在理解过程，因此仅简单选取部分参数\n",
    "rf.fit(X, y)\n",
    "# 预测过程\n",
    "pred = rf.predict(rfDf_test[col])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 222,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "此时的revol_util特征统计指标:\n",
      "\n",
      "count    130658.000000\n",
      "mean          0.433896\n",
      "std           0.250182\n",
      "min           0.000000\n",
      "25%           0.236000\n",
      "50%           0.416000\n",
      "75%           0.620000\n",
      "max           1.244000\n",
      "Name: revol_util, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "rfDf.loc[(rfDf['revol_util'].isnull()), 'revol_util'] = pred    # 填补缺失值\n",
    "print(\"此时的revol_util特征统计指标:\\n\")\n",
    "print(rfDf['revol_util'].describe()) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 异常值处理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "异常值是数据集中存在的非正常的值，也称为离群点。异常值有可能是不正确的“脏数据”，也可能是正确的异常数据，需要具体问题具体分析。例如：人的身高100m、人的体重60g是属于不正确的“脏数据”；"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "异常值的处理异常值通常需要进行处理，否则在某些场景中会导致预测结果受到负面影响。处理方法通常有以下几种：\n",
    "\n",
    "1：直接删除异常样本：当异常值数量不多且易于判断时，可以直接删除。\n",
    "2：用均值替代异常样本\n",
    "3：视为缺失值：某些情况下，可以将异常值当成缺失值处理，利用缺失值的填充方法处理异常。\n",
    "\n",
    "对异常值的不同处理方法，下面我们采用Lending Club贷款数据集中的数据进行举例说明。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1、异常值处理1--简单统计分析 & 删除异常值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Lending Club贷款数据集中的revol_util特征为例，它表示信用账户的使用率（去掉百分号），因此容易理解这列特征的值不应超过100。观察到异常样本有1300条，占总样本的比例很小，可以将这些样本直接删除。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "count    130658.000000\n",
      "mean          0.433896\n",
      "std           0.250182\n",
      "min           0.000000\n",
      "25%           0.236000\n",
      "50%           0.416000\n",
      "75%           0.620000\n",
      "max           1.244000\n",
      "Name: revol_util, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(rfDf['revol_util'].describe())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "revol_util特征列的异常样本数为:  197\n"
     ]
    }
   ],
   "source": [
    "print(\"revol_util特征列的异常样本数为: \", data[data['revol_util'] > 1].shape[0])\n",
    "data.drop(data[data['revol_util'] > 1].index, inplace = True)    # 根据索引删除样本"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2、异常值处理2--3σ原则 & 均值替代异常样本"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以dti特征为例，它表示贷款人每月还款占其收入的比例（去掉百分号后的值），容易理解这列特征的值不应超过100，将超过100的值视为无意义的异常值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3dfZQcV3nn8e+vu2dGlvyKPGaNZFtirWQRJkA8kUmAnCSOHTkbENnYBzlscHa9q4TEZ7PJcnbtk8UhDjm72peYsDgEg50YAbGJiTezIGMChuMkhygagcGWbYVB2HgQYAkLoRdL/fbsH1U96ml1T9dIMxLq+/sc95mqW7dqqqatfvo+t+peRQRmZpae0qk+ATMzOzUcAMzMEuUAYGaWKAcAM7NEOQCYmSXKAcDMLFGFAoCktZJ2SJqUdHOX7SOS7su3b5G0omP7xZIOSHp70WOamdnC6hsAJJWBO4BrgNXA9ZJWd1S7EdgbEZcCtwMbO7bfDjw4x2OamdkCqhSoswaYjIidAJLuBdYBT7TVWQe8M1++H3ivJEVESHoTsBM4OMdjHuP888+PFStWFDhlMzNr2bZt256IGO0sLxIAlgHPtq1PAVf0qhMRdUn7gKWSXgD+C3AV8PZu9Wc55jFWrFjBxMREgVM2M7MWSc90Ky/SB6AuZZ3jR/Sq8/vA7RFx4DiOmVWUNkiakDSxe/fuvidrZmbFFGkBTAEXta0vB3b1qDMlqQKcAzxP9q3+Wkn/AzgXaEo6DGwrcEwAIuJO4E6AsbExD1xkZjZPigSArcAqSSuBbwLrgV/uqDMO3AB8AbgWeDiyUeZe36og6Z3AgYh4bx4k+h3TzMwWUN8AkOf0bwIeAsrA3RGxXdJtwEREjAN3AZskTZJ9819/PMc8wWsxM7M50Ok0HPTY2Fi4E9jMbG4kbYuIsc5yPwlsZpYoBwAzs0Q5AJiZJSr5APB/PvtV/v2H3K9gZulJPgA89e39PLHr+6f6NMzMTrrkA0C10aTWaJ7q0zAzO+mSDwA1BwAzS5QDQKNJvXH6PAthZjZfkg8A1XqTqlsAZpYgB4BGUG+6BWBm6Uk+ANTqTRrNoOEgYGaJcQDI0z/uCDaz1CQfAFr5f6eBzCw1yQeAWr0546eZWSqSDwDV/BZQp4DMLDXJB4DpPgCngMwsMckHgKpTQGaWqOQDgO8CMrNUFQoAktZK2iFpUtLNXbaPSLov375F0oq8fI2kR/PXlyX9Yts+T0t6LN92SsZjbjaPPgRW83AQZpaYvpPCSyoDdwBXAVPAVknjEfFEW7Ubgb0Rcamk9cBG4M3A48BYPgn8hcCXJf2/iKjn+/10ROyZzwuai1rz6Ld+twDMLDVFWgBrgMmI2BkRVeBeYF1HnXXAPfny/cCVkhQRh9o+7BcBP1Bfs6t1BwAzS1eRALAMeLZtfSov61on/8DfBywFkHSFpO3AY8CvtwWEAD4taZukDcd/CcevPe3jFJCZpaZvCghQl7LOT8uedSJiC/BySS8D7pH0YEQcBl4bEbskXQD8jaSnIuKRY355Fhw2AFx88cUFTre49m/9bgGYWWqKtACmgIva1pcDu3rVkVQBzgGeb68QEU8CB4HL8vVd+c/ngAfIUk3HiIg7I2IsIsZGR0cLnG5xTgGZWcqKBICtwCpJKyUNA+uB8Y4648AN+fK1wMMREfk+FQBJlwA/DDwtaYmks/LyJcDVZB3GJ1V1RgvAKSAzS0vfFFB+B89NwENAGbg7IrZLug2YiIhx4C5gk6RJsm/+6/PdXwfcLKkGNIHfiIg9kl4KPCCpdQ4fjYhPzffF9eMUkJmlrEgfABGxGdjcUXZr2/Jh4Lou+20CNnUp3wm8cq4nO99q9fZOYAcAM0tL0k8CVxuN6WXPC2xmqUk7ALS1ADwvsJmlJukA4D4AM0uZA0DOKSAzS03SAaD9OQCngMwsNWkHAKeAzCxhSQeA9oe/nAIys9QkHgDcAjCzdCUdAGaOBeQWgJmlJekA0PrWP1SWWwBmlpykA0CrE3jxcMUBwMySk3QAaI0FtHi47BSQmSUn6QBQbTQol8RIpeQWgJklJ+kAUGsEw+USQ2UHADNLT9IBoFpvMlRWHgCcAjKztCQdAGqNJsOVku8CMrMkJR0AshaAU0BmlqakA8DRFkDJQ0GYWXIKBQBJayXtkDQp6eYu20ck3Zdv3yJpRV6+RtKj+evLkn6x6DFPhlojGCqXqJTl0UDNLDl9A4CkMnAHcA2wGrhe0uqOajcCeyPiUuB2YGNe/jgwFhGvAtYC75dUKXjMBVdtZCmgYaeAzCxBRVoAa4DJiNgZEVXgXmBdR511wD358v3AlZIUEYciop6XLwJaeZYix1xw1XqT4fwuIKeAzCw1RQLAMuDZtvWpvKxrnfwDfx+wFEDSFZK2A48Bv55vL3LMBdfqA6j4LiAzS1CRAKAuZZ1fl3vWiYgtEfFy4MeAWyQtKnjM7MDSBkkTkiZ2795d4HSLq7WlgNwHYGapKRIApoCL2taXA7t61ZFUAc4Bnm+vEBFPAgeBywoes7XfnRExFhFjo6OjBU63uGreCewUkJmlqEgA2AqskrRS0jCwHhjvqDMO3JAvXws8HBGR71MBkHQJ8MPA0wWPueBazwE4BWRmKar0qxARdUk3AQ8BZeDuiNgu6TZgIiLGgbuATZImyb75r893fx1ws6Qa0AR+IyL2AHQ75jxfW1+1RpOR/DkAp4DMLDV9AwBARGwGNneU3dq2fBi4rst+m4BNRY95smV9AGKoLKeAzCw5aT8J7KEgzCxhSQeAaqPJUGsoiGYQ4VaAmaUj7QBQb+bzAWR3pXpIaDNLSdIBoNaI6cHgsnWngcwsHYkHgKMTwrTWzcxSkWwAaDaDerP1IJhTQGaWnmQDQOu+f6eAzCxVyQaA1od9a1L49jIzsxQkHACydE9rKIj2MjOzFCQbAKr17Nt+azRQcAvAzNKSbACodekD8HAQZpaSZANAqxN4qKzpFJAHhDOzlCQbANo7gZ0CMrMUJRsA2vsAhipOAZlZepINAO19AJWSZpSZmaUg2QBQrR+9DbTVCew+ADNLSbIB4GgLQAw7BWRmCUo2ALT3ATgFZGYpKhQAJK2VtEPSpKSbu2wfkXRfvn2LpBV5+VWStkl6LP/5M237fD4/5qP564L5uqgiuj0H4BSQmaWk75zAksrAHcBVwBSwVdJ4RDzRVu1GYG9EXCppPbAReDOwB3hDROySdBnZJPDL2vZ7S0RMzNO1zMnR5wBKTgGZWZKKtADWAJMRsTMiqsC9wLqOOuuAe/Ll+4ErJSkivhQRu/Ly7cAiSSPzceInqjXuz7BTQGaWqCIBYBnwbNv6FDO/xc+oExF1YB+wtKPOLwFfiogjbWV/lqd/3iFJczrzE9TtOQAHADNLSZEA0O2DuTNXMmsdSS8nSwv9Wtv2t0TEK4DX569f6frLpQ2SJiRN7N69u8DpFtPeB3D0SWCngMwsHUUCwBRwUdv6cmBXrzqSKsA5wPP5+nLgAeCtEfG11g4R8c38537go2SppmNExJ0RMRYRY6Ojo0WuqZBa+1hATgGZWYKKBICtwCpJKyUNA+uB8Y4648AN+fK1wMMREZLOBT4J3BIRf9+qLKki6fx8eQj4BeDxE7uUuWnvBC6XhOQAYGZp6RsA8pz+TWR38DwJfCwitku6TdIb82p3AUslTQK/A7RuFb0JuBR4R8ftniPAQ5K+AjwKfBP4wHxeWD+tPoDhcgkpmxjeKSAzS0nf20ABImIzsLmj7Na25cPAdV32exfwrh6Hvbz4ac6/WqNJpSRKefpnqCS3AMwsKck+CVxrxPQDYABDlZIDgJklJdkAUK03GSofvXnJKSAzS026AaDRnH4CGJwCMrP0JBsAavXm9P3/4BSQmaUn3QDQaE4/AQxZCshjAZlZSpINANVGc0YncKUkjwZqZklJNwDUZ94FNOwUkJklJtkAUOvoBK6U5BSQmSUl7QDQcRuoU0BmlpJkA0D2HIBTQGaWrmQDQK1LJ7BTQGaWkmQDQLURMx8EK7sFYGZpSTYAZH0AfhDMzNKVbAA4ZiygkjwWkJklJdkA0NkH4BSQmaUm6QAwow+g4tFAzSwtyQaAzttAPRqomaUm3QDQ2QJwCsjMElMoAEhaK2mHpElJN3fZPiLpvnz7Fkkr8vKrJG2T9Fj+82fa9rk8L5+U9B5J6jzuQspmBGvrBK54NFAzS0vfACCpDNwBXAOsBq6XtLqj2o3A3oi4FLgd2JiX7wHeEBGvAG4ANrXt8z5gA7Aqf609geuYk0YzaDSD4XJ5umwoHw00wkHAzNJQpAWwBpiMiJ0RUQXuBdZ11FkH3JMv3w9cKUkR8aWI2JWXbwcW5a2FC4GzI+ILkX3ifgh40wlfTUGtVE+lYywggHrTAcDM0lAkACwDnm1bn8rLutaJiDqwD1jaUeeXgC9FxJG8/lSfYy6YVgAY6bgLCHAayMySUSlQp1tuvvNTctY6kl5Olha6eg7HbO27gSxVxMUXX9zvXAtp3e5ZKR09jdZytdHkDMpd9zMzGyRFWgBTwEVt68uBXb3qSKoA5wDP5+vLgQeAt0bE19rqL+9zTAAi4s6IGIuIsdHR0QKn21+rBdA+JWTrjiDfCWRmqSgSALYCqyStlDQMrAfGO+qMk3XyAlwLPBwRIelc4JPALRHx963KEfEtYL+k1+R3/7wV+OsTvJbCpgNAx5PA4BSQmaWjbwDIc/o3AQ8BTwIfi4jtkm6T9Ma82l3AUkmTwO8ArVtFbwIuBd4h6dH8dUG+7W3AB4FJ4GvAg/N1Uf20UkDtt4G2UkBuAZhZKor0ARARm4HNHWW3ti0fBq7rst+7gHf1OOYEcNlcTna+1Lu0AFopIM8KZmapSPJJ4NaHfKXkFJCZpSvJANBKAQ1XnAIys3QlGQC6pYCGnAIys8QkGQC6pYCGnQIys8QkGQC6pYBaTwUfrjVOyTmZmZ1sSQaAbimgM4azp38PVR0AzCwNSQaAWpcU0JLh7I7YQ9X6KTknM7OTLdEAcGwKaPFI1gI46BaAmSUi0QAwSwvgiFsAZpaGpANA+2BwZwy5BWBmaUk0ABw7FlCpJBYPl90CMLNkJBoA8hZAaeblLx6uuAVgZslIOwBUZl7+kpGy7wIys2QkGgCOTQFB3gI44haAmaUh0QDQPQW0ZNgtADNLR7IBoFwSpVJHC2DEfQBmlo4kA0C9EcekfyBrAbzgFoCZJSLJAFBtNI9J/4D7AMwsLYUCgKS1knZImpR0c5ftI5Luy7dvkbQiL18q6XOSDkh6b8c+n8+P2TlX8IKrN+KYO4DAdwGZWVr6zgksqQzcAVwFTAFbJY1HxBNt1W4E9kbEpZLWAxuBNwOHgXeQzf3bbf7ft+RzA59UtUZzegawdn4OwMxSUqQFsAaYjIidEVEF7gXWddRZB9yTL98PXClJEXEwIv6OLBD8wKg2mjOGgm5ZMlymWm96WkgzS0KRALAMeLZtfSov61onIurAPmBpgWP/WZ7+eYekY7+SL5B6IxjukgJaPNIaEtqtADMbfEUCQLcP5s55E4vU6fSWiHgF8Pr89Stdf7m0QdKEpIndu3f3PdkieqWAlkxPCuN+ADMbfEUCwBRwUdv6cmBXrzqSKsA5wPOzHTQivpn/3A98lCzV1K3enRExFhFjo6OjBU63v1qPFFCrBeA7gcwsBUUCwFZglaSVkoaB9cB4R51x4IZ8+Vrg4Yjo2QKQVJF0fr48BPwC8PhcT/541XrdBeQWgJklpO9dQBFRl3QT8BBQBu6OiO2SbgMmImIcuAvYJGmS7Jv/+tb+kp4GzgaGJb0JuBp4Bngo//AvA58BPjCvVzaLWqPJUI+7gMAtADNLQ98AABARm4HNHWW3ti0fBq7rse+KHoe9vNgpzr/sSeBuD4K5BWBm6Uj2SeBKt6EgPC+wmSUkyQBQazQZ7toC8LzAZpaOJANArxRQa2J4twDMLAVJBoBajxTQGa0+ALcAzCwBSQaAao8U0HClxHC55BaAmSUhyQDQKwUEsHjEcwKYWRqSDAC9UkCQ9QO4BWBmKUgyAPQaDRSyZwH8HICZpSDJANBrSkjI5wX2k8BmloAkA0CvweAgGw/ILQAzS0FyASAiqDdn6QT2vMBmlojkAkCtkQ1S2isF5HmBzSwVCQaAbLrHWVsAvgvIzBKQXACoT7cAZukD8JPAZpaA5AJAdboF0PsuoEO1Bs1mvxktzcxOb8kFgH4poCXDZSLgcN1pIDMbbMkFgFYKqNJzKAjPCmZmaUguAPRLAXleYDNLRaEAIGmtpB2SJiXd3GX7iKT78u1bJK3Iy5dK+pykA5Le27HP5ZIey/d5j6Tun8jzrJUC6jYaKHheYDNLR98AIKkM3AFcA6wGrpe0uqPajcDeiLgUuB3YmJcfBt4BvL3Lod8HbABW5a+1x3MBc9U3BeQWgJklokgLYA0wGRE7I6IK3Aus66izDrgnX74fuFKSIuJgRPwdWSCYJulC4OyI+EJEBPAh4E0nciFF9U0BeV5gM0tEkQCwDHi2bX0qL+taJyLqwD5gaZ9jTvU55oKoF0wBeU4AMxt0RQJAt6/KnTfJF6lzXPUlbZA0IWli9+7dsxyymFqfFNAS9wGYWSKKBIAp4KK29eXArl51JFWAc4Dn+xxzeZ9jAhARd0bEWESMjY6OFjjd2dX6PgjmPgAzS0ORALAVWCVppaRhYD0w3lFnHLghX74WeDjP7XcVEd8C9kt6TX73z1uBv57z2R+H/g+C5S0A9wGY2YCr9KsQEXVJNwEPAWXg7ojYLuk2YCIixoG7gE2SJsm++a9v7S/paeBsYFjSm4CrI+IJ4G3AnwNnAA/mrwVX6zMW0KKhEhIeD8jMBl7fAAAQEZuBzR1lt7YtHwau67Hvih7lE8BlRU90vvRLAUnyvMBmloTkngTulwICzwtsZmlIMADMngICWDJS4YDvAjKzAZdcAKg3Z08BAZw5UuGg+wDMbMAlFwCq9SwA9HoOALKngQ8cdgAws8GWXABopYB6PQkMcObIEAfcAjCzAZdcAKj3uQsI4KxFFQcAMxt4yQWA1l1A5dLsfQAOAGY26JILANVGMFwuMdv0A0tGKu4DMLOBl1wAqDeaVGZJ/0CWAqo2mhzxvMBmNsCSCwC1RnPWZwAgSwGBRwQ1s8GWXACoNqJvAFiSBwCngcxskCUXAOqN5qx3AMHRFsD+I7WTcUpmZqdEcgGgSArorEVOAZnZ4EsvADSjcAvggFsAZjbA0gsA9f4tgFYfwH73AZjZAEsvAMwhBeSHwcxskCUXAOrN6PscwJm+C8jMEpBcAKgWSAEtHi4j4SGhzWygFQoAktZK2iFpUtLNXbaPSLov375F0oq2bbfk5Tsk/Vxb+dOSHpP0qKSJ+biYImqN5qwjgUI2LeSZwxX2OwCY2QDrOyewpDJwB3AVMAVslTSeT+zeciOwNyIulbQe2Ai8WdJqsgniXw68BPiMpB+KiNb9lT8dEXvm8Xr6KpICAjhzkccDMrPBVqQFsAaYjIidEVEF7gXWddRZB9yTL98PXKlstLV1wL0RcSQivg5M5sc7ZYqkgMAjgprZ4CsSAJYBz7atT+VlXetERB3YByzts28An5a0TdKGuZ/68ak3o28KCFrzAjsAmNng6psCArrlS6Jgndn2fW1E7JJ0AfA3kp6KiEeO+eVZcNgAcPHFFxc43dnVCowGCp4UxswGX5EWwBRwUdv6cmBXrzqSKsA5wPOz7RsRrZ/PAQ/QIzUUEXdGxFhEjI2OjhY43dkVeRAM8hSQ+wDMbIAVCQBbgVWSVkoaJuvUHe+oMw7ckC9fCzwcEZGXr8/vEloJrAL+UdISSWcBSFoCXA08fuKX01+RoSDAfQBmNvj6poAioi7pJuAhoAzcHRHbJd0GTETEOHAXsEnSJNk3//X5vtslfQx4AqgDvxkRDUkvBh7IZ+WqAB+NiE8twPUdo8iTwOA+ADMbfEX6AIiIzcDmjrJb25YPA9f12PcPgT/sKNsJvHKuJzsfiqaAWn0AETHr9JFmZqer5J4ErhV9DmCkQgQcqnpIaDMbTEkFgIgo9CQwtM0K5jSQmQ2opAJAoxlEUDgFBA4AZja4kgoA9Wb2CELRFBB4RFAzG1xJBYBqowlQKAV0plNAZjbgkgoA9UbeAij1bwF4VjAzG3RJBYBa3gIYqhTvA/CcAGY2qJIKANV6HgCcAjIzSysAtDqBCw0F4buAzGzAJRUAplNABVoAI5UyQ2W5D8DMBlZSAWAuKSDI0kDuAzCzQZVUAJhLCgjyaSEdAMxsQCUVAOaSAgJYMlxxCsjMBlZaAWCOKaBsRNAaADu+vZ/DNQ8MZ2aDI5kAUG80qc01BTRS4eCRBk/s+j5r//gR7nxk50KeopnZSTXwAaDZDP71B7fwrk8+OecWwJmLhjhwpM67P/NPRMAnv/KthTxVM7OTauADQKkklp45zMe/OMX+PJ1TKRW9C6jM1N5DfPqJ77Dy/CXs+M5+Jp87sJCna2Z20gx8AAB4yxWXsP9wnb/64jcBGK4UTwHVGsE5ZwzxgbdeDsCDj7kVYGaDoVAAkLRW0g5Jk5Ju7rJ9RNJ9+fYtkla0bbslL98h6eeKHnM+/diK81h1wZn87Vf3AHN5DmAIgA0/+VIuveAsxi45j82PfxuAf9j5Xda++xG2PbN3YU7azGyB9f0klFQG7gCuAVYD10ta3VHtRmBvRFwK3A5szPddTTZB/MuBtcCfSCoXPOa8kcQvX3Hx9HqlYAC4/JLzWLPyRfzqT6wA4JpXXMiT3/o+j/zTbt724W089e39/PqHt/Gd7x8G4Ot7DvLxbVPTt5uamR2PF6oN/tuDT7LpC08TEQv2e4pMCr8GmMwnckfSvcA64Im2OuuAd+bL9wPvVTaT+jrg3og4Anxd0mR+PAocc179q1cvZ+OnnuJwrVn4LqDXrTqf1606f3p97WX/jD/4xBPceM9WFg9XeP+vXM5v3/cov7ZpGz/7sgt4z8OTVOtNPvC3O/mDN13Gdw9U+fgXpzhca7DuVcu4avWLeXrPQb70jb0sGalw+SXnsfy8xXzj+UN84/mDLF0ywj+/4EwqJTG19xDP7T/CheecwbJzzwDguwePcKja4PwzRzh7UYVqo8n3DtWIgHMXD7FoqMzhWoN9L9QYKpc4e1GFSrlEvdHkcD2bCnM4Hwm10Tw6PWYpHx671mjSaAYjlRLZ29f92YlGMyiJ6TqQdba3tBXPqGOWkojgYLXBwSN1loxUWDJc5oVag2e+e4h9L9RYft4ZvPjsRXxlah+f3/EcL1Qb/OQPjXLu4iF+52Nfnu5v/PyO3fzP617Ji5YMz/s5FgkAy4Bn29angCt61YmIuqR9wNK8/B869l2WL/c75rw6Z/EQb/iRl/CX26YYKZeP6xjLzj2DV110Ll+Z+h7v/eVX8/pVo/zv617J2z7yRR599nv8yx+5kJ992QVsfHAH1/3pFwAYPWuExcNl3v6XXy78eyRoD/olQbPjS0ClpOknm1uGyqLWmFk2XC5NT4TTqiNpelgMgJFKiUYzZhxv0VCJWiNo5GXlkqaP3yrLggczyropCUoSJQkpu76SRAQEkf8EApoxM5AIkf83vR8c/ftEtufRY+THad82fTzUWpj+Ic3c1h6vOn/HzDI6Fo79fZ2mf//Mwm6LdMbN9n27xdTocR4zy+mxMvMEWofv9bc53pDe7Vf2+nLb72/ZS9e/cWvbLCe+EF9TjtSbM/5Ndfs321JSlpn44N99Hcg+Nz584xV8bfcB/vCTT3LNHz/CA7/xWl6SfxmcL0UCQLe/TedV9KrTq7xbDqbrX0bSBmBDvnpA0o4e51nE+cCeczeewBFyP/nfjy37k/zV7pkT/1Un4nxgz6k9hZPO1zz4Bv56nwFe/19nFJ2/7HdP6Jov6VZYJABMARe1rS8HdvWoMyWpApwDPN9n337HBCAi7gTuLHCefUmaiIix+TjW6SC16wVfcwpSu15YuGsu0hu6FVglaaWkYbJO3fGOOuPADfnytcDDkfVcjAPr87uEVgKrgH8seEwzM1tAfVsAeU7/JuAhoAzcHRHbJd0GTETEOHAXsCnv5H2e7AOdvN7HyDp368BvRkQDoNsx5//yzMysFy3kLUY/aCRtyFNKSUjtesHXnILUrhcW7pqTCgBmZnZUEkNBmJnZsZIIACdz2ImTSdJFkj4n6UlJ2yX9Vl7+Ikl/I+mr+c/z8nJJek/+d/iKpB89tVdwfPKnyb8k6RP5+sp8CJKv5kOSDOflPYcoOZ1IOlfS/ZKeyt/rHx/k91jSb+f/Pz8u6S8kLRq091jS3ZKek/R4W9mc31NJN+T1vyrphm6/azYDHwB0koedOMnqwH+KiJcBrwF+M7+2m4HPRsQq4LP5OmR/g1X5awPwvpN/yvPit4An29Y3Arfn17uXbGgS6DFEyWnoj4FPRcS/AF5Jdu0D+R5LWgb8B2AsIi4ju0lkPYP3Hv852fA47eb0nkp6EfB7ZA/RrgF+rxU0CouIgX4BPw481LZ+C3DLqT6vBbrWvwauAnYAF+ZlFwI78uX3A9e31Z+ud7q8yJ4Z+SzwM8AnyB423ANUOt9vsrvMfjxfruT1dKqvYY7Xezbw9c7zHtT3mKOjCrwof88+AfzcIL7HwArg8eN9T4Hrgfe3lc+oV+Q18C0Aug9lsaxH3dNW3vR9NbAFeHFEfAsg/3lBXm0Q/hbvBv4z0BrLYinwvYhoTd7cfk0zhigBWkOUnE5eCuwG/ixPe31Q0hIG9D2OiG8C/wv4BvAtsvdsG4P9HrfM9T094fc6hQBQZCiL05qkM4GPA/8xIr4/W9UuZafN30LSLwDPRcS29uIuVaPAttNFBfhR4H0R8WrgIEdTA92c1tecpzDWASuBlwBLyFIgnQbpPe5nrkPtFJZCACgylMVpS9IQ2Yf/RyLir/Li70i6MN9+IfBcXn66/y1eC7xR0tPAvWRpoHcD5yobggRmXtP09WrmECWnkylgKiK25Ov3kwWEQX2Pfxb4ekTsjoga8FfATzDY73HLXN/TE36vUwgAAzvshCSRPYX9ZET8Udum9qE5biDrG2iVvzW/q35sK+4AAAIFSURBVOA1wL5Wk/N0EBG3RMTyiFhB9j4+HBFvAT5HNgQJHHu93YYoOW1ExLeBZyX9cF50JdmT9QP5HpOlfl4jaXH+/3fregf2PW4z1/f0IeBqSeflLaer87LiTnVHyEnqbPl54J+ArwG/e6rPZx6v63VkTb6vAI/mr58ny4F+Fvhq/vNFeX2R3RH1NeAxsjstTvl1HOe1/xTwiXz5pWRjTE0CfwmM5OWL8vXJfPtLT/V5H+e1vgqYyN/n/wucN8jvMfD7wFPA48AmYGTQ3mPgL8j6OGpk3+RvPJ73FPi3+bVPAv9mrufhJ4HNzBKVQgrIzMy6cAAwM0uUA4CZWaIcAMzMEuUAYGaWqCJzAptZD5LeCRwgG4Pm0xGxKy//IPBHEfHEKTw9s1k5AJjNj18lu299F0BE/LtTejZmBTgFZDZHkn5X2fwSnwFaT+iOAR+R9KikMyR9XtLYKTxNs77cAjCbA0mXkw1D8Wqyfz9fJButcgJ4e0RM5PVO2TmaFeUAYDY3rwceiIhDAJIGYlwpS5NTQGZz5/FTbCA4AJjNzSPAL+Z5/rOAN+Tl+4GzTt1pmc2dU0BmcxARX5R0H9nIq88Af5tv+nPgTyW9QDZlodkPPI8GamaWKKeAzMwS5QBgZpYoBwAzs0Q5AJiZJcoBwMwsUQ4AZmaJcgAwM0uUA4CZWaL+P9ZzDpknLxFwAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt    # 可视化\n",
    "# 在Jupyter notebook里嵌入图片\n",
    "%matplotlib inline\n",
    "\n",
    "ax = sns.distplot(data['dti'],kde=True,hist=False)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEGCAYAAABsLkJ6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deXhc9X3v8fd3Nu2LLcmSbdmWF9nGMrEBx9gQ9s1kwclTeALlNrTl1m0anjbNTVNy23ATnjS99LYhyQ1NS0ITwk0KlCTFTQwmYTMhYCywvMiLLK+SZWuxdslaRvO9f8yROxkka2SPdGb5vp5Hz5w585uj7/HAfHR+53d+R1QVY4wx6cfjdgHGGGPcYQFgjDFpygLAGGPSlAWAMcakKQsAY4xJUz63C5iM4uJiraiocLsMY4xJKu+++26bqpZEr0+qAKioqKC6utrtMowxJqmIyPGx1lsXkDHGpCkLAGOMSVMWAMYYk6YsAIwxJk1ZABhjTJqyADDGmDRlAWCMMWnKAiCFhEI2tbcxJnZJdSGYCVNVjp3p541DrWyra6O+pYeWnkECPg9P/sFaVs0rdLtEY0wSiOkIQEQ2iMhBEakXkQfHeD1DRJ5xXt8uIhVRr88XkV4R+Xys2zTvV9fcwxd/uodr/v5VbviH13jo+VoOnO5m5dwC7lk7n0yfl8//+y4GgyNul2qMSQITHgGIiBd4DLgFaAR2iMhmVd0X0ex+oENVl4jI3cAjwCcjXn8UeGGS2zQOVeWZHQ38r821+DzC1UuK+ePrFnPNkmIWFGUjIgB8qLKYP/j+Dr718iH+8rblLldtjEl0sXQBrQXqVfUIgIg8DWwEIr+sNwJfdpafA74tIqKqKiIfB44AfZPcpiH85f9XP9nNs9WNXL2kiEc/uZpZeZljtr1h2SzuuqKcf379CBuqZnNpecE0V2uMSSaxdAHNBRoinjc668Zso6pBoAsoEpEc4K+Ar1zANgEQkU0iUi0i1a2trTGUm1p+8JtjPFvdyKevX8wP//DKcb/8R/3NR1dQmOXnn16rn6YKjTHJKpYAkDHWRQ83Ga/NV4BHVbX3ArYZXqn6uKquUdU1JSXvm800pe092cXfbTnAzZfM4gu3LcPrGeuf7bcVZPm5/dIyXjvYysCwnQswxowvlgBoBOZFPC8HmsZrIyI+oABoB64E/l5EjgGfBf6niDwQ4zbTWu9gkAd+/B5FuQH+z52rzvXzx+K2qjLODo+wrS79jpiMMbGLJQB2AJUislBEAsDdwOaoNpuB+5zlO4FXNOwaVa1Q1QrgG8DXVPXbMW4zrf14+3GOnenn0U+uZkZOYFLvXbeoiPxMH1trm6eoOmNMKpjwJLCqBp2/2rcCXuBfVbVWRB4GqlV1M/AE8JSI1BP+y//uC9nmRe5LyhgJKU+9fZy1C2eyblHRpN/v93q4+ZJSXj7QTHAkhM9r1/sZY94vpgvBVHULsCVq3UMRywPAXRNs48sTbdOEvXawhYb2s/zVhgsfynlrVRk/3XmSd462c9WS4jhWZ4xJFfanYQL64VvHmZWXwW1VZRe8jeuWlpDp9/Bi7ek4VmaMSSUWAAnmaFsfr9e18rtXzsd/EV03WQEv1y0t4aXaZlRtjiBjzPtZACSYH719HJ9H+N218y96WzdfUsrp7gH2neqOQ2XGmFRjAZBAVJUXa09z3dISZuWf/4KvWFy/bBYArx5ouehtGWNSjwVAAjna1kdjx1muXxafC95K8jL4QHkBrx606wGMMe9nAZBARi/cum7prLht8/pls9h5ooOOvqG4bdMYkxosABLI63WtVBRlM78oO27bvGFZCSGFbYfsKMAY89ssABLEYHCEt4+0c+3S+M53tKq8kKKcgJ0HMMa8jwVAgqg+1sHZ4RGui3MAeDzCdUtLeL2ulRG7ZaQxJoIFQIJ4va4Vv1cuaOqHiVy/fBYd/cPUNHTEfdvGmORlAZAgttW1smbBTHIy4n+b5usqS/B5hF/us24gY8x/sQBIAM3dAxw43cN1cRr+Ga0g28/6xUW8uPeUXRVsjDnHAiABvO4M/7y2cupueHNbVRnHzvRT1xx9bx5jTLqyAEgA2+paKcnL4JLZeVP2O25dUYoIvLjXJoczxoRZALhsJKT8ur6NaytLJnXXr8malZ/JFfNn2OygxphzLABctruxk87+Ya5dOvVz9m9YWcb+U90cP9M35b/LGJP4YgoAEdkgIgdFpF5EHhzj9QwRecZ5fbuIVDjr14pIjfOzS0Q+EfGeYyKyx3mtOl47lGy21bUhAtdMYf//qNH7C2y1owBjDDEEgIh4gceA24EVwD0isiKq2f1Ah6ouAR4FHnHW7wXWqOpqYAPwL85N40fdoKqrVXXNRe5H0tp2qJUPzC1g5iTv+3sh5s3MpmpOvp0HMMYAsR0BrAXqVfWIqg4BTwMbo9psBJ50lp8DbhIRUdV+VQ066zMBG4MYoat/mJ0nOuI+/cP5bKgq470TnTR3D0zb7zTGJKZYAmAu0BDxvNFZN2Yb5wu/CygCEJErRaQW2AP8SUQgKPCSiLwrIpvG++UisklEqkWkurU1tSY0e/NwGyEl7tM/nM+GleFuoJesG8iYtBdLAIw1NCX6L/lx26jqdlWtAj4IfFFERu90crWqXk64a+kzInLtWL9cVR9X1TWquqakZPq+KKfDtrpW8jJ9rJ5XOG2/s7I0j8UlOTYayBgTUwA0AvMinpcDTeO1cfr4C4D2yAaquh/oA1Y6z5ucxxbgZ4S7mtKGqvJ6XStXLy7GdxH3/r0QG1aW8faRdrtHgDFpLpZvnh1ApYgsFJEAcDewOarNZuA+Z/lO4BVVVec9PgARWQAsA46JSI6I5Dnrc4BbCZ8wThv1Lb2c6hqYsukfzmdD1WxGQsqv9jdP++82xiSOCQPA6bN/ANgK7AeeVdVaEXlYRO5wmj0BFIlIPfA5YHSo6IeAXSJSQ/iv/D9V1TagFPi1iOwC3gF+oaovxnPHEt256R+msf9/1Mq5+cwtzLLhoMakuZimnlTVLcCWqHUPRSwPAHeN8b6ngKfGWH8EWDXZYlPJ63WtLC7JYW5h1rT/bhHhtqoy/t/24/QOBsmdghlIjTGJz64EdsHA8AjvHG2P671/J2vDyjKGgiFeO2hTRBuTriwAXLD9aDuDwdC0TP8wnisWzKA4N2AXhRmTxiwAXPD6wVYCPg9XLoz/3b9i5fUIt6wo49UDLQwMj7hWhzHGPRYALnjjUCtXLpxJVsDrah0bVpbRNzTCm/VtrtZhjHGHBcA0a+sd5FBLL1ctdq/7Z9T6RUXkZfqsG8iYNGUBMM12HA1fH7d24UyXK4GAz8Mtl5Tyy/3NDI+E3C7HGDPNLACm2TvH2sn0e7h0boHbpQBw28oyOvuHeedo+8SNjTEpxQJgmr1ztJ3L588g4EuMf/prK0vI8nt5Ye8pt0sxxkyzxPgWShPdA8PsO9WdEN0/o7ICXq5fVsLW2mZCIZut25h0YgEwjd491oFqYvT/R7r90tm09gzy7okOt0sxxkwjC4BptP1oO36vcNm8GW6X8ltuXD6LgM/DC3tsNJAx6cQCYBq9c/QMl84tcH38f7TcDB/XVhbz4t5TqFo3kDHpwgJgmpwdGmF3YxdrXbz693w2rJxNU9cAuxq73C7FGDNNLACmyc6GDoIhZe3CxOr+GXXLJaX4PGKjgYxJIxYA06SmoROAy+cnZgAUZPu5akkxL+w5bd1AxqQJC4BpsruhiwVF2RRmB9wuZVwfvXQ2J9r7rRvImDRhATBNdjd2sqp8+m7+fiFuW1lGwOvhP3aedLsUY8w0iCkARGSDiBwUkXoReXCM1zNE5Bnn9e0iUuGsXysiNc7PLhH5RKzbTCUtPQM0dQ3wgfLEmP5hPAVZfm5cPouf724iaHMDGZPyJgwAEfECjwG3AyuAe0RkRVSz+4EOVV0CPAo84qzfC6xR1dXABuBfRMQX4zZTxu6GcJfK6nmJfQQA8PHL5tDWO8Sbh8+4XYoxZorFcgSwFqhX1SOqOgQ8DWyMarMReNJZfg64SUREVfudm8oDZAKjZxdj2WbK2NXYidcjVM1J7CMAgOuXzSIv08fz1g1kTMqLJQDmAg0RzxuddWO2cb7wu4AiABG5UkRqgT3Anzivx7JNnPdvEpFqEalubW2NodzEs6uxi6WleQl3AdhYMv1ePrxyNltrT3N2yO4UZkwqiyUAZIx10eMEx22jqttVtQr4IPBFEcmMcZs4739cVdeo6pqSkpIYyk0squqcAE78v/5HbbxsDn1DI/xqf7PbpRhjplAsAdAIzIt4Xg40jddGRHxAAfBbE8yr6n6gD1gZ4zZTwon2fjr7h1mVBP3/o65cWERZfibP11g3kDGpLJYA2AFUishCEQkAdwObo9psBu5zlu8EXlFVdd7jAxCRBcAy4FiM20wJoxeAJfoIoEhej/CxVbN57WArHX1DbpdjjJkiEwaA02f/ALAV2A88q6q1IvKwiNzhNHsCKBKReuBzwOiwzg8Bu0SkBvgZ8Keq2jbeNuO5Y4lid2MXmX4PS0vz3C5lUjaunkswpPxij00NYUyq8sXSSFW3AFui1j0UsTwA3DXG+54Cnop1m6loT2MXK2bn4/cm1zV3VXPyWTIrl+drTvLf1i1wuxxjzBRIrm+lJBMKKftOdbMyQe7/OxkiwsdXz2HHsQ4aO/rdLscYMwUsAKbQ8fZ+egeDrEyC8f9j2bg6PDJ3866UPD9vTNqzAJhCe0+GrwCumpvvciUXZt7MbK5YMIPNNRYAxqQiC4ApVNvUjd8rVM5KrhPAkW5fWcaB0z0ca+tzuxRjTJxZAEyh2qYulpXlEfAl7z/zbVVlALxYa/cLNibVJO83U4JTVfae7Era/v9R82Zmc+ncAl7cawFgTKqxAJgiTV0DdPQPUzUnOfv/I21YWUZNQyenus66XYoxJo4sAKZI7bkTwMl9BADhAADYakcBxqQUC4ApsrepG4/AJWXJfwSwuCSXylm5vGABYExKsQCYIrUnu1hckpsUU0DH4vaVZew41k5b76DbpRhj4sQCYIrUNiXnFcDjubWqjJDCaweT854Mxpj3swCYAq09g5zuHkiJE8CjqubkU5qfwSsH7B4BxqQKC4ApUNsUPgGcSkcAIsKNy2exra6NoaDdMN6YVGABMAVqm7oBWJFCRwAANy0vpXcwyDtH2ydubIxJeBYAU6C2qYsFRdnkZ/rdLiWurl5STIbPw8vWDWRMSrAAmAJ7T3Yn/RXAY8kKeLlqcREv729BdcxbOBtjkkhMASAiG0TkoIjUi8iDY7yeISLPOK9vF5EKZ/0tIvKuiOxxHm+MeM9rzjZrnJ9Z8dopN3WdHeZEe3/Kdf+MuvGSUk6093O41SaHMybZTRgAIuIFHgNuB1YA94jIiqhm9wMdqroEeBR4xFnfBnxMVS8lfM/g6LuD3auqq52flovYj4Sxz+n/T6UTwJFuWh7O6Zf3WzeQMckuliOAtUC9qh5R1SHgaWBjVJuNwJPO8nPATSIiqrpTVUcnk68FMkUkIx6FJ6rREUCpNAQ00pzCLJaX5bHtkF0PYEyyiyUA5gINEc8bnXVjtnFu+N4FFEW1+R1gp6pGXkr6faf750siImP9chHZJCLVIlLd2pr4Xzq1Td2U5WdSnJu6OXdNZTE7jnZwdmjE7VKMMRchlgAY64s5+gzgeduISBXhbqE/jnj9Xqdr6Brn5/fG+uWq+riqrlHVNSUlJTGU6669J7tYmaR3AIvVhypLGBoJ8c4xGw5qTDKLJQAagXkRz8uB6HsEnmsjIj6gAGh3npcDPwM+paqHR9+gqiedxx7gx4S7mpJa/1CQw629VKXgCKBIaytmEvB5eKMu8Y/IjDHjiyUAdgCVIrJQRALA3cDmqDabCZ/kBbgTeEVVVUQKgV8AX1TVN0cbi4hPRIqdZT/wUWDvxe2K+/af6iGkqdv/Pyor4GVtxUzeONTmdinGmIswYQA4ffoPAFuB/cCzqlorIg+LyB1OsyeAIhGpBz4HjA4VfQBYAnwparhnBrBVRHYDNcBJ4Lvx3DE37EvBKSDGc01lMQebe2juHnC7FGPMBfLF0khVtwBbotY9FLE8ANw1xvu+Cnx1nM1eEXuZyWHvyW5m5gSYXZDpdilT7prKEv7uhQO8caiNO68od7scY8wFsCuB42hvUxdVc/IZZ0BTSllelkdxbgZv2HBQY5KWBUCcDAVD1DX3pPwJ4FEej3BNZTG/PtRm00IYk6QsAOKkrrmH4RFN+SGgkdYvLuJM3xB1zb1ul2KMuQAWAHHyX1cAp8cRAMBVi8PX+r112EYDGZOMLADipLapm9wMHwtmZrtdyrQpn5HNvJlZ/ObwGbdLMcZcAAuAONl7sosVc/LxeFL/BHCk9YuK2H60nVDIzgMYk2wsAOJgJKTsP9WTkvcAmMhVi4vpOjvMvlPdbpdijJkkC4A4ONrWy9nhkZS/Angs68+dB7BuIGOSjQVAHOw9mdr3ADif0vxMFhXn8NYRCwBjko0FQBzsPdlFhs/D4pIct0txxfrFRbxztJ3gSMjtUowxk2ABEAd7m7pYPjsfnzc9/znXLy6idzDInpNdbpdijJmE9PzGiqNQSKk92c3KNOz/H7VukXMewLqBjEkqFgAX6UhbHz2DQVbPK3S7FNcU52awrDTPTgQbk2QsAC7SroZOgLQOAAh3A1Uf62AoaOcBjEkWFgAXaVdjJ7kZPhaV5LpdiqvWLSri7PAIuxo73S7FGBMjC4CLtKuhk5Vz8/Gm2RXA0dYtmomIXQ9gTDKxALgIg8ER9p3qZlWad/8AFGYHWDE7n9/YxHDGJI2YAkBENojIQRGpF5EHx3g9Q0SecV7fLiIVzvpbRORdEdnjPN4Y8Z4rnPX1IvItScK7qOw/FZ4CenW5BQCE5wV670QnA8MjbpdijInBhAEgIl7gMeB2YAVwj4isiGp2P9ChqkuAR4FHnPVtwMdU9VLCN41/KuI93wE2AZXOz4aL2A9XjJ4AtiOAsKuWFDEUDPHeiQ63SzHGxCCWI4C1QL2qHlHVIeBpYGNUm43Ak87yc8BNIiKqulNVm5z1tUCmc7QwG8hX1bc0fDupHwIfv+i9mWa7GjopyctIi3sAx+KDFTPxesTOAxiTJGIJgLlAQ8TzRmfdmG1UNQh0AUVRbX4H2Kmqg077xgm2CYCIbBKRahGpbm1NrPvP1jR2sqq8MC3uARyLvEw/K+cWWAAYkyRiCYCxvt2iJ38/bxsRqSLcLfTHk9hmeKXq46q6RlXXlJSUxFDu9Og6O8yR1j5Wz0u/CeDO56rFRdQ0dNI/FHS7FGPMBGIJgEZgXsTzcqBpvDYi4gMKgHbneTnwM+BTqno4on35BNtMaHsaw/PeWP//b1u/qIhgSNlxzM4DGJPoYgmAHUCliCwUkQBwN7A5qs1mwid5Ae4EXlFVFZFC4BfAF1X1zdHGqnoK6BGRdc7on08Bz1/kvkyr0QuePjDXAiDSmooZ+L12HsCYZDBhADh9+g8AW4H9wLOqWisiD4vIHU6zJ4AiEakHPgeMDhV9AFgCfElEapyfWc5rnwa+B9QDh4EX4rVT06GmoZNFxTkUZPvdLiWhZAd8rJ5XaBPDGZMEfLE0UtUtwJaodQ9FLA8Ad43xvq8CXx1nm9XAyskUmyhUlZqGTj60pNjtUhLS+kVFfPvVeroHhsnPtIA0JlHZlcAX4HT3AK09g6wqtxPAY1m3uIiQwo6j7W6XYow5DwuAC2AXgJ3f5fNnEPB57DyAMQnOAuAC1DR04fcKl8xO35vAnE+m38sV82fwGwsAYxKaBcAF2NXQySWz88n0e90uJWGtX1zE/tPddPYPuV2KMWYcFgCTNBJS9pzsYpVNAHdeVy0uQhXePmLnAYxJVBYAk3SktZfewaD1/0/gA+WFZPm9vGXTQxuTsCwAJqnm3C0gbQTQ+QR8HtZUzLDrAYxJYBYAk3TuFpDF6X0LyFhctbiYuuZeWnsG3S7FGDMGC4BJqmno5APlBXjS/BaQsVi/ODwh7Nt2FGBMQrIAmISB4REOnOphtfX/x2TlnHxyM3zWDWRMgrIAmIS9J7sIhtQCIEY+r4crF87kbbsewJiEZAEwCTtPOCeA51sAxGr94iKOtPVxumvA7VKMMVEsACahpqGTuYVZzMqzW0DGat2i8HmAt47YcFBjEo0FwCTUNHTaX/+TtGJ2PjOy/bxxyALAmERjARCjlp4BTnae5TLr/58Uj0e4prKEbXWthEJj3vXTGOMSC4AY1Yz2/1sATNr1y0po6x2itqnb7VKMMRFiCgAR2SAiB0WkXkQeHOP1DBF5xnl9u4hUOOuLRORVEekVkW9Hvec1Z5vRdwpLSDUNnfg8wsq5dgXwZF27tASAVw+2uFyJMSbShAEgIl7gMeB2YAVwj4isiGp2P9ChqkuAR4FHnPUDwJeAz4+z+XtVdbXzk9DfDjtP2AygF6o4N4NV5QW8ZgFgTEKJ5QhgLVCvqkdUdQh4GtgY1WYj8KSz/Bxwk4iIqvap6q8JB0HSGgkpuxs7rfvnIly3bBY1DZ109Nn00MYkilgCYC7QEPG80Vk3ZhvnJvJdQFEM2/6+0/3zJREZc24FEdkkItUiUt3a2hrDJuOvvqWXvqERC4CLcP2yEkIK2w658xkaY94vlgAY64s5ejhHLG2i3auqlwLXOD+/N1YjVX1cVdeo6pqSkpIJi50KNQ0dAFxmQ0Av2KryQmZk+3n9oAWAMYkilgBoBOZFPC8HmsZrIyI+oAA4751AVPWk89gD/JhwV1NCqmnopCDLz8LiHLdLSVpeZzjoa3WtBEdCbpdjjCG2ANgBVIrIQhEJAHcDm6PabAbuc5bvBF5R1XGPAETEJyLFzrIf+Ciwd7LFT5edJzpZNa+QcXqpTIxuX1lGe9+QTQ5nTIKYMACcPv0HgK3AfuBZVa0VkYdF5A6n2RNAkYjUA58Dzg0VFZFjwNeB3xeRRmcEUQawVUR2AzXASeC78dut+OkbDFLXbDOAxsMNy2eRm+Fjc030AaQxxg2+WBqp6hZgS9S6hyKWB4C7xnlvxTibvSK2Et21u7GLkFr/fzxk+r3cWlXKi7Wn+eonVpLhsyG1xrjJrgSewLlbQNpN4OPijlVz6BkI8pqdDDbGdRYAE6hp6KCiKJsZOQG3S0kJVy8pZmZOgP/cZd1AxrjNAuA8VJWdJ+wCsHjyez18+NIyfrW/mb7BoNvlGJPWLADO41TXAC09gxYAcbZx9VwGhkP8Yvcpt0sxJq1ZAJzHaP//ZfNnuFxJalmzYAbLy/L47htHbIpoY1xkAXAetU1d+DzC8tl5bpeSUkSETdcu4lBLL6/X2clgY9xiAXAeB0/3srA4x4YrToGPrZrD7IJM/mXbYbdLMSZtWQCcx6GWHpaW2l//U8Hv9fCHVy/k7SPt7G7sdLscY9KSBcA4zg6NcKK9n8rSXLdLSVl3r51HXoaPx16td7sUY9KSBcA4Drf2ooodAUyhvEw/f3TtIrbWNvPWYZsfyJjpZgEwjrrmHgCW2hHAlNp07SLmFmbxlf+stVlCjZlmFgDjONjcg98rLCiyKaCnUqbfy19/5BIOnO7h33Y0TPwGY0zcWACM41BzL4tLcvF77Z9oqt2+sox1i2byjy8dpKt/2O1yjEkb9u02jrrmHiqt/39aiAgPfbSKrrPD/NPrdkLYmOliATCGvsEgjR1nWTrL+v+ny4o5+Xzisrl8/81jNHWedbscY9KCBcAY6lt6AewIYJp97paloPD1X9a5XYoxacECYAw2Asgd5TOyue+qBfzkvUYOnO52uxxjUl5MASAiG0TkoIjUi8iDY7yeISLPOK9vF5EKZ32RiLwqIr0i8u2o91whInuc93xLEuiGu3XNPQR8HhsB5ILP3LCE3Awf3/jlIbdLMSblTRgAIuIFHgNuB1YA9zj39Y10P9ChqkuAR4FHnPUDwJeAz4+x6e8Am4BK52fDhezAVKhzRgB5PQmTSWmjMDvAH1y9kBdrT9tRgDFTLJYjgLVAvaoeUdUh4GlgY1SbjcCTzvJzwE0iIqrap6q/JhwE54jIbCBfVd9SVQV+CHz8YnYknupbeqm0E8Cu+cOrK8jN8PF/X7ERQcZMpVgCYC4QeYVOo7NuzDaqGgS6gKIJttk4wTYBEJFNIlItItWtrVM/dfBQMMSprrNUFGVP+e8yYyvMDnDfVQvYsucU9S09bpdjTMqKJQDG6geJvotHLG0uqL2qPq6qa1R1TUlJyXk2GR9NnWcJKcybaQHgpvs/tIgsv9eOAoyZQrEEQCMwL+J5ORB9R+9zbUTEBxQA7RNss3yCbbriRHs/APMtAFw1MyfAp9ZXsHlXE7VNXW6XY0xKiiUAdgCVIrJQRALA3cDmqDabgfuc5TuBV5y+/TGp6imgR0TWOaN/PgU8P+nqp8C5ALAuINd9+vrFFGT5+dqW/ZznPydjzAWaMACcPv0HgK3AfuBZVa0VkYdF5A6n2RNAkYjUA58Dzg0VFZFjwNeB3xeRxogRRJ8GvgfUA4eBF+KzSxenob2fgNdDaV6m26WkvYIsP39+UyVv1p/h1YMtbpdjTMrxxdJIVbcAW6LWPRSxPADcNc57K8ZZXw2sjLXQ6XKivZ/ymVl4bAhoQrj3ygX88K3jfG3LAa6tLMFnk/MZEzf2f1OUE+391v+fQAI+D3+1YTn1Lb18/81jbpdjTEqxAIigqpw4YwGQaG6rKuXmS2bxDy8d5Ehrr9vlGJMyLAAidJ0dpmcwaAGQYESEr33iUjJ8Hr7w3G5GQnZC2Jh4sACIYENAE9es/Ey+fEcV1cc7+MFvjrldjjEpwQIggg0BTWyfuGwu1y8r4Ru/rONM76Db5RiT9CwAIowGwLwZFgCJSET4m49cQv/wCN982WYLNeZiWQBEOHGmn+LcADkZMY2ONS5YMiuPe9bO40fbT3DYTggbc1EsACKcaO+3OYCSwGdvXkqW38vfbTngdinGJDULgAh2DUByKM7N4NPXL+ZX+5vZ12T3DDDmQlkAOIZHQjR1nrUASBL3XjmfgM/DMztOuF2KMUnLAsBh00Anl8LsABuqyvjZzpMMDI+4XfSKTw8AAA58SURBVI4xSckCwGHXACSfuz84j+6BIC/uPe12KcYkJQsAhwVA8lm3qIj5M7N52rqBjLkgFgCOE6PTQOfbNNDJwuMRPvnBebx9pJ2jbX1ul2NM0rEAcDS091M+IwuvTQOdVO68ohyPwE/fa5y4sTHmt1gAOOwagORUmp/J2oUz2Vpr5wGMmayYAkBENojIQRGpF5EHx3g9Q0SecV7fLiIVEa990Vl/UERui1h/TET2iEiNiFTHY2cuhk0DnbxuXVFGXXOvdQMZM0kTBoCIeIHHgNuBFcA9Ebd1HHU/0KGqS4BHgUec964gfA/hKmAD8E/O9kbdoKqrVXXNRe/JRejqH6Z7IMgCmwQuKd1aVQrAS3YUYMykxHIEsBaoV9UjqjoEPA1sjGqzEXjSWX4OuMm52ftG4GlVHVTVo4Tv/7s2PqXHz/H28F+O1gWUnMpnZFM1J5+X9jW7XYoxSSWWAJgLNEQ8b3TWjdnGuYl8F1A0wXsVeElE3hWRTeP9chHZJCLVIlLd2toaQ7mTZ0NAk99tVWW8d6KDlp4Bt0sxJmnEEgBjDYuJviXTeG3O996rVfVywl1LnxGRa8f65ar6uKquUdU1JSUlMZQ7eeemgbYASFq3VpWiCr/a1+J2KcYkjVgCoBGYF/G8HGgar42I+IACoP1871XV0ccW4Ge42DXU0N5PUU6AXJsGOmktK81jQVE2L+2z8wDGxCqWANgBVIrIQhEJED6puzmqzWbgPmf5TuAVVVVn/d3OKKGFQCXwjojkiEgegIjkALcCey9+dy6MDQFNfiLCrStK+U39GXoGht0ux5ikMGEAOH36DwBbgf3As6paKyIPi8gdTrMngCIRqQc+BzzovLcWeBbYB7wIfEZVR4BS4Ncisgt4B/iFqr4Y312LnU0DnRpuqypjaCTEawen5lyRMakmpj4PVd0CbIla91DE8gBw1zjv/Vvgb6PWHQFWTbbYqRCeBnqAjassAJLdZfNnUJwbYGvtaT62ao7b5RiT8NL+SuBTnQOMhNSOAFKA1yPcfEkprx1sZTBoU0QbM5G0DwAbAZRabqsqo3cwyG8On3G7FGMSngXA6DUAdhVwSli/uIicgJeXau2iMGMmYgHQ3o/fK5TZNNApIdPv5frls/jlvmZGQtGXqxhjIqV9ANS39DJvZrZNA51Cbl1RSlvvIO8cbXe7FGMSWloHgKpS09DB6vJCt0sxcXTLilKKcgJ85/XDbpdiTEJL6wBoaD9LW+8Qly2Y4XYpJo6yAz7+6NpFbKtr5b0THW6XY0zCSusAGP1yuGK+BUCq+b11C5iZE+CbvzrkdinGJKy0D4CcgJdlZXlul2LiLCfDxx9ds4jX61rZaUcBxowp7QNg1bxCOwGcoj61fgEzsv385XO7aXCG+xpj/kvaBkD/UJD9p3q43Lp/UlZOho/H7r2clu4BPv7Ym7x73EYFGRMpbQNgd2MXIyHl8gU2AiiVXbW4mJ995mryMn3c+c9v8bvffZtndzTQ2jPodmnGuC5tJ8AfPQF82Tw7Akh1i0ty+Y/PXM333zzG8zUn+cJPdgOwZFYuV8yfwdKyPCpn5TKnMIvS/AxyM3yE72hqTGpL3wA43smi4hxm5ATcLsVMg8LsAH9xy1I+e3Mle0928+bhNt4+coaX9p3mmeqG32qbHfBSmp9JSW4GM3MCzMgJUOQ8zsrLoKIoh/lF2RRk+V3aG2PiIy0DIBRSdp7o4Ppls9wuxUwzEeHS8gIuLS/gT65bjKrS1jtEfUsvLT0DNHcP0Nw9SHP3AC09gxxu7aXj+BDtfUNEzywxI9vP/KIcFhZlU1Gcw0Lnp6I4hzw7ijBJIC0D4JnqBs70DXHD8qm5x7BJHiJCSV4GJXkZ520XCindA8Oc6hrg+Jl+jp/p43h7P8fa+thxrIPndzWhEQER8HrIz/KTn+WjIMtPQZafwiw/hdkBCrPDyzNyAswpzGJ2QSal+Zn4vWl7Ss64JO0CoKV7gK9t2c+6RTP5yKWz3S7HJAmPR5wv7wCXzM5/3+sDwyOcaO/nSGsfx8/00dE/TNfZYbrPDtM9MMyZ3iGOtPbR2T9E90Dwfe8XgVl5GcwuyGJOYTgQ8jP95GX6zj3mZfrJzfQ5yz7yMvxk+j12pGEuWEwBICIbgG8CXuB7qvq/o17PAH4IXAGcAT6pqsec174I3A+MAH+mqltj2eZU+cp/7mMwGOJrn7jU/scxcZPp97K0NI+lpRNfVBgcCdE9EORM7yBNXQOc6jxLU9cATZ1nOd01wIHTPWyra6N38P1BEc3rEXIzfORm+Aj4PPi94jx6CHg9BHzhR7+zPPqY4fNQmO1nZk441GZk+8nP9OPzCn6vh6FgiIHhEQaGQ5wdHmF4JITPI/h9HvIyfOGjmOwABVl+O3JJYhMGgIh4gceAW4BGYIeIbFbVfRHN7gc6VHWJiNwNPAJ8UkRWEL6JfBUwB/iViCx13jPRNuNmeCTEW4fP8PPdTfxizyk+f+tSFpXkTsWvMmZCPq+HmTkBZuYEqDxPYIyElN6BIN0Dw/QMBOkZGKZ3MBheHgzSOxCkd3DYeRxhaCTEcDAUfhwJMRgM0TsYZCgYfh5+VAaDIQaHR+iJIWBikRPwEvB58Ho8+DyCzyv4PILXI/g8nvCjsy7yefh1Z13E637vaPvwstfjIfJvNY06F6OMPe23IHgkfHQliPMY7vaLXudxLgaNXO+JWMZ5n+d92xh7eyLOesAjQobfQ4bPS6bfQ6bfS6azPLouw+8l4PXg8YBXwv820/EHaixHAGuBeuc+vojI08BGwjd6H7UR+LKz/BzwbQlXvxF4WlUHgaPOTePXOu0m2mZcqCq3PrqNo219ZAe83HlFOZuuXRzvX2NM3Hk9QkG2n4LsqRltFBwJ0Xl2mM7+Idr7hukZGGZ4RAmGQvi9HrL8XjL9XrL8Xvw+IeiER99gkM6zw3T1D9HZP0zn2WGGR0IEQ8rIiIYfQyGGo54HQ0pwRBkJKYPBEUZCyrDzfDgUYsR5fXRbwyOhc+3fR877NBwJGg6HkIa/B5T3h0ciEwmHgccTDp6ah24l0++N6++IJQDmApHj5BqBK8dro6pBEekCipz1b0e9d66zPNE2ARCRTcAm52mviByMoeZx7Qf+cfyXi4G2i9l+ErB9TB3psJ/psI8Qw35mffWitr9grJWxBMBYxyHROTpem/HWj9VpOGY2q+rjwOPnKzBeRKRaVddMx+9yi+1j6kiH/UyHfQT39jOWszeNwLyI5+VA03htRMQHFADt53lvLNs0xhgzhWIJgB1ApYgsFJEA4ZO6m6PabAbuc5bvBF5RVXXW3y0iGSKyEKgE3olxm8YYY6bQhF1ATp/+A8BWwkM2/1VVa0XkYaBaVTcDTwBPOSd52wl/oeO0e5bwyd0g8BlVHQEYa5vx371Jm5auJpfZPqaOdNjPdNhHcGk/RZPptLgxxpi4sSs4jDEmTVkAGGNMmrIAIDwthYgcFJF6EXnQ7XriQUTmicirIrJfRGpF5M+d9TNF5Jcicsh5TIkbIoiIV0R2isjPnecLRWS7s5/POIMNkpaIFIrIcyJywPlM16fiZykif+H897pXRP5NRDKT/bMUkX8VkRYR2RuxbszPTsK+5XwX7RaRy6eytrQPgIipLm4HVgD3OFNYJLsg8D9U9RJgHfAZZ78eBF5W1UrgZed5Kvhzwtf5jXoEeNTZzw7C05Uks28CL6rqcmAV4X1Nqc9SROYCfwasUdWVhAeIjE4tk8yf5Q+ADVHrxvvsbic8WrKS8AWw35nKwtI+AIiY6kJVh4DRaSmSmqqeUtX3nOUewl8Ycwnv25NOsyeBj7tTYfyISDnwEeB7znMBbiQ8LQkk+X6KSD5wLeHRdqjqkKp2koKfJeGRiVnO9UTZwCmS/LNU1W2ER0dGGu+z2wj8UMPeBgpFZMqmLbYAGHuqi7njtE1KIlIBXAZsB0pV9RSEQwJIhbvifAP4AhBynhcBnao6OttZsn+mi4BW4PtON9f3RCSHFPssVfUk8A/ACcJf/F3Au6TWZzlqvM9uWr+PLABim+oiaYlILvAT4LOq2u12PfEmIh8FWlT13cjVYzRN5s/UB1wOfEdVLwP6SPLunrE4/eAbgYWEZw/OIdwlEi2ZP8uJTOt/uxYAKTwthYj4CX/5/0hVf+qsbh49pHQeW9yqL06uBu4QkWOEu+9uJHxEUOh0I0Dyf6aNQKOqbneeP0c4EFLts7wZOKqqrao6DPwUuIrU+ixHjffZTev3kQVAik5L4fSDPwHsV9WvR7wUOW3HfcDz011bPKnqF1W1XFUrCH92r6jqvcCrhKclgSTfT1U9DTSIyDJn1U2Er65Pqc+ScNfPOhHJdv77Hd3PlPksI4z32W0GPuWMBloHdI12FU0JVU37H+DDQB1wGPhrt+uJ0z59iPCh426gxvn5MOH+8ZeBQ87jTLdrjeM+Xw/83FleRHjeqXrg34EMt+u7yH1bDVQ7n+d/ADNS8bMEvgIcAPYCTwEZyf5ZAv9G+JzGMOG/8O8f77Mj3AX0mPNdtIfwiKgpq82mgjDGmDRlXUDGGJOmLACMMSZNWQAYY0yasgAwxpg0ZQFgjDFpKpabwhtjxiEiXwZ6gTbgJVVtctZ/D/i6qu5zsTxjzssCwJj4+H3CY9ebAFT1v7tajTExsC4gYyZJRP7auX/Er4DRq3PXAD8SkRoRyRKR10RkjYtlGjMhOwIwZhJE5ArCU05cRvj/n/cIz1hZDXxeVauddq7VaEysLACMmZxrgJ+paj+AiCT9vFEmfVkXkDGTZ/OnmJRgAWDM5GwDPuH08+cBH3PW9wB57pVlzORZF5Axk6Cq74nIM4RnVz0OvOG89APgn0XkLLDepfKMmRSbDdQYY9KUdQEZY0yasgAwxpg0ZQFgjDFpygLAGGPSlAWAMcakKQsAY4xJUxYAxhiTpv4/blnJxvEVQ6AAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 去掉高于100的部分再次观察\n",
    "norDf = data[data['dti'] <= 100.0]\n",
    "sns.distplot(norDf['dti'],kde=True,hist=False)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以观察到dti的分布近似于正态分布，根据实际情况分析：大部分贷款人dti小于40（即比例为40%），还款压力较小；而dti高的贷款人必然存在较大的还款压力，也因此有坏账的风险，这部分值也可以视为异常值，不过这部分异常值对模型预测是有意义的，因此予以保留。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "大于 51.243700 的可以视为异常值\n",
      "小于 -13.951381 的可以视为异常值\n"
     ]
    }
   ],
   "source": [
    "# 根据3σ原则计算异常值区间\n",
    "dti_mean = norDf['dti'].mean()    # 计算均值\n",
    "dti_std = norDf['dti'].std()    # 计算方差\n",
    "print(\"大于 %f 的可以视为异常值\" % (dti_mean + 3*dti_std))\n",
    "print(\"小于 %f 的可以视为异常值\" % (dti_mean - 3*dti_std))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "将大于100的那部分dti值进行异常值处理，采用去掉这部分异常值的dti均值进行替代。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "此时dti特征列还有0个大于100的特征值\n"
     ]
    }
   ],
   "source": [
    "data.loc[data['dti'] > 100, 'dti'] = dti_mean    # 均值替代\n",
    "print(\"此时dti特征列还有%d个大于100的特征值\" % (data[data['dti'] > 100]).shape[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3、异常值处理3--箱型图 & 视为缺失值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以annual_inc为例，它表示贷款人的年收入。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 229,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABHcAAAEFCAYAAABpfQt2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAWoUlEQVR4nO3de5Ce1X0f8O/Zq25IQoiLDbYEGIxBku2aNnHtOk4cexImY6YZQsBJHVomGQ/gttRDW0omZty4nlrjYVxj13GC65AgCNaYwmRokxnbHbvYzlQURwJcx1xKAkm5GIRsdEPy6R+7+7K72surRbv7ntXnM7OjZ8/zPOf5nec5Z1b66n3fLbXWAAAAANCmvsUuAAAAAIC5E+4AAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0bmI9O169fXzdu3DgfXQMAAAAcl+6///7naq0nT26fl3Bn48aN2bFjx3x0DQAAAHBcKqU8MVW7t2UBAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDsAAAAADRPuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDsAAAAADRPuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDsAAAAADRPuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDsAAAAADRPuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDsAAAAADRPuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDsAAAAADRPuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDsAAAAADRPuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANEy4AwAAANAw4Q4AAABAw4Q7AAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDtHad26dSmlJDeuSSklpZSsW7duscsCAAAAjlPCnaP0wgsvpNaaJKm1ptaaF154YZGrAgAAAI5Xwh0AAACAhgl3AAAAABom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcmUEppaf6AQAAAJhMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANGzWcKeU8sVSyjOllAcXoqClpPT1ZWDZiiQlfYPDGRhenlJG2krpy8Dw8vQNDk9qWzGxra8vpX8wfQPjzh9eMenPKfoZPXfC96PHTu6jb2Bo3PbwK30ODI302z84oY/SN3hEv6Wvf2I9A0NTXn94xQmvXKNz7sCkukeuN3LPVhw5rs5xA6PbK7Nq1eqcfe55KX196R9cNm6cy9M3uCylry9DK1Z1xrnmxJNy6mtOH61xZaefzp99/Z0+hpavSl9ff84+97y8933vy7KVq1NKX5atXJ2rr74mt23b1rn2+GsMDo88g1Nfc3pOO/116esf6eO2bduSpHPe5PZuzHbu+JoGR8c1Vu/RmulaV199Ted+jL9/mzdvPurrtODVPLPF0G2945/jXOdJr2jtGc23+bofc+3X8+kNS/k5dDO2pTx+5pe5w1wt1twxZ4+vezDQxTFfSnJzklvnt5SlpfT1ZWjl2my55Lrs3/NcHvnqH2XTP742azecn91PPJyH7v50Ttv87vzdX34tb3jPP8my1eunbNu5fWsOvrQnwyes6Zz/6Ndvz1P/+8+z5ZLrOv09eNdNeXn/3rz+p34pZ//s5VMes3P71gyvPikH9jyXLZf86wnnjl1v8vbhgwfyk58czmu2/ExOu+CdeeD2j6d/YDBrXndeXnzy+9lyyXX53r2/l5f3vnhEny/v35t1Z27Oj59+PBdc/C86+3Z95VP5yeFDefOv/Jv8v4f+Z5753rePqHPVKRuy94dPzTjmsePOetel2bl9a/7mqafTP7QiA8PLsvmXPzLheocO7M+bf/WGCfX96LkfZmjlmqw545WxTL5Xh/b9qFP7o1+/Pd/49p9ny6XXd477g1u35g/+y5dy+t//pSx75vkJz/jBu25KLSUv7j2Qzb/8kWwebb/m2uvyrfu+lW3b78obLromZ33glfYk+bUPfGDGuXXbtm255trrpj13bP/gutd35uBYTbfcujVJ8tnP3tzVPJ7pWt+671u55dZtWf36C6a8f5s3b86uXbvmuoR6zmz3vdd0W+/VV1+TW27dls3j5vXRzpNe0dozmm/zdT/m2q/n0xuW8nPoZmxLefzML3OHuVqsuWPOHn/3oNRaZz+olI1J/rTWuqmbTi+88MK6Y8eOV1dZDyilZPL96bTduCa58cVpjxtYtjJvvfy3s+6sLbnv5qvypos+lHVnbensf/6xnfnevZ/Pmy76UL537+fzjms+N23bA7f/bqevJNP299Ddn0nf4GDecc3npj1mcl/jaxlfw9j2Q3d/Jkny8v4f5eeuvyPfvOk3c8HFH853/+Q/5C2/+u+y7qwt+erHL52yz4fu/kxe3v+jznGT9/2ja38/X/vEZVPu73bMD9z+u3nPDXd2todWrMkFF3942uuNb/vuHR/PWy67YcJY5lLDd+/4eIZXnzTtvrdcdsMR7bvu/EQ2X3r9Ee3P3/eHefSv/k9mcva552XdO35j2nPH9k83rl13fiL7X9oz4zW6udZTT/1tNl96/Yz379D+l7q6Tgtmu++9ptt6l61cPeVcPJp50itae0bzbb7ux1z79Xx6w1J+Dt2MbSmPn/ll7jBXizV3zNmlew9KKffXWi+c3H7MPnOnlPJbpZQdpZQdzz777LHqdtGVUiZ8dXvc4QP7snbD+UmSl559srM9Zu2G8zvtLz375Ixt4/uaqb99u5/unDfdMZP7Gn/dqbb37X46+3Y/nUP7Rv6Rvm/301m74fwc2vdSp5/p+hw7b7p9Sabd3+2YDx/YN2F7rL7prje+7dD+vUeMZS41HNq/d8Z9U7Uf2PvjKdsff/QHmc3jj/5gxnPH9k83rgN7fzzrNbq51tgYZrp/S8ls973XdFvvdHPxaOZJr2jtGc23+bofc+3X8+kNS/k5dDO2pTx+5pe5w1wt1twxZ4+/e3DMwp1a6xdqrRfWWi88+eSTj1W3i67WOuGr2+P6h5dn9xMPJ0lWnnxGZ3vM7ice7rSvPPmMGdvG9zVTf8vXnto5b7pjJvc1/rpTbS9fe2qWrz01A8tXJkmWrz01u594OAPLV3b6ma7PsfOm25dk2v3djrl/ePmE7bH6prve+LaBZSuOGMtcahhYtmLGfVO1D69YNWX7mWefk9mcefY5M547tn+6cQ2vWDXrNbq51tgYZrp/S8ls973XdFvvdHPxaOZJr2jtGc23+bofc+3X8+kNS/k5dDO2pTx+5pe5w1wt1twxZ4+/e+C3Zc2Twwf3Zef2rXn+sZ05852X5MG7bsrzj+3MTw4fGn2b0Kdzynlvz4N33ZQz33nJtG07t2/N4YMHJpx/ynlv7/Q91t+Dd92Ug3v35JTz3j7tMTu3b82Kk16bnds/ecS5Y9ebvH1o/0s5uHdPTtv0rjz/2M4c3LsnO7/8yZy4YVOn/2VrTp6yz4N79+TEDZvy0N2fnrBv11c+lUMH9+b5x3bmtE3vmrLONaefO+uYx44b2+7rH8rBvXuy6yufOuJ6B/fuOaK+w4cOZef2iWOZfK/G1z5dDX0lnec2+Rq11iPqeeTem3PlFR/MI/fefET7x278nVnn1sdu/J0Zzx3bP9W4dm3fmiuv+GDX83ima115xQeza/vWae/fm845a87rpxfNdt97Tbf1jj3HVzNPekVrz2i+zdf9mGu/nk9vWMrPoZuxLeXxM7/MHeZqseaOOXsc3oPJrziZ6ivJxiQPdnNsrTVve9vb6lIwcnumafvo6hmPq7XWlFL7h5fXpNQyMFT7h5bV5JW2/qFltQwMTWpbPrGtlJq+gVr6x50/tHzSn1P0M3ruhO9Hj53cR+kfHLc99Eqf/YMj/fYNTOgjZeCIflP6JtbTPzjl9YeWr3rlGp1z+yfVPTDuni0/clyd4/pHt1fUlStPqGed88aaUmrfwPC4cS6rZWC4ppQ6uHxlZ5yr166rp5z22tEaV3T66fxZ+jp9DC5bWUvpq2ed88b68+99bx1ecUJNSh1ecUK96qqr6x/fdlvn2uOvMTA08gxOOe219dTXnlFL30gff3zbbbXW2jlvcns3Zjt3fE0Do+Maq/dozXStq666unM/xt+/TZs2HfV1WvBqntli6Lbe8c9xrvOkV7T2jObbfN2Pufbr+fSGpfwcuhnbUh4/88vcYa4Wa+6Ys0vzHiTZUafIYWb9QOVSyu1J3p1kfZKnk3y01nrLTOf4QOXZ+wEAAAA4GtN9oPKsvwq91nr5/JQEAAAAwKvlM3cAAAAAGibcAQAAAGiYcAcAAACgYcKdGRyrD0H2YcoAAADAfBHuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANEy4AwAAANAw4c4clFI6f5ZScuKJJy5yRQAAAMDxamCxC2hNrfWV7RsXrw4AAACAxCt3AAAAAJom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcAQAAAGiYcAcAAACgYcIdAAAAgIYJdwAAAAAaJtwBAAAAaJhwBwAAAKBhwh0AAACAhgl3AAAAABom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcAQAAAGiYcAcAAACgYcIdAAAAgIYJdwAAAAAaJtwBAAAAaJhwBwAAAKBhwh0AAACAhgl3AAAAABom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcAQAAAGiYcAcAAACgYcIdAAAAgIYJdwAAAAAaJtwBAAAAaJhwBwAAAKBhwh0AAACAhgl3AAAAABom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcAQAAAGiYcAcAAACgYcIdAAAAgIYJdwAAAAAaJtwBAAAAaJhwBwAAAKBhwh0AAACAhgl3AAAAABom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcAQAAAGiYcAcAAACgYcIdAAAAgIYJdwAAAAAaJtwBAAAAaJhwBwAAAKBhwh0AAACAhgl3AAAAABom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcAQAAAGiYcAcAAACgYcIdAAAAgIYJdwAAAAAaJtwBAAAAaJhwBwAAAKBhwh0AAACAhgl3AAAAABom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcAQAAAGiYcAcAAACgYcIdAAAAgIYJdwAAAAAaJtwBAAAAaJhwBwAAAKBhwh0AAACAhgl3AAAAABom3AEAAABomHAHAAAAoGHCHQAAAICGCXcAAAAAGibcAQAAAGiYcAcAAACgYaXWeuw7LeXZJE8c844X3vokzy12EdAQawa6Z71A96wXODrWDHSvtfWyodZ68uTGeQl3lopSyo5a64WLXQe0wpqB7lkv0D3rBY6ONQPdWyrrxduyAAAAABom3AEAAABomHBnZl9Y7AKgMdYMdM96ge5ZL3B0rBno3pJYLz5zBwAAAKBhXrkDAAAA0DDhDgAAAEDDhDtJSim/UEr5finlkVLKv51i/3Ap5U9G9/9FKWXjwlcJvaGL9fKvSikPl1J2llK+WkrZsBh1Qq+Ybc2MO+6SUkotpTT/qzhhrrpZL6WUS0d/zjxUStm20DVCr+ji72SvL6V8vZTywOjfyy5ajDqhF5RSvlhKeaaU8uA0+0sp5T+NrqedpZS/t9A1vlrHfbhTSulP8tkkv5jk/CSXl1LOn3TYlUleqLW+IclNSf7jwlYJvaHL9fJAkgtrrVuSbE/yyYWtEnpHl2smpZQTkvzzJH+xsBVC7+hmvZRSzklyfZJ31FovSPIvF7xQ6AFd/nz57SR31lrfmuSyJJ9b2Cqhp3wpyS/MsP8Xk5wz+vVbSf7zAtR0TB334U6Sf5DkkVrrY7XWg0nuSHLxpGMuTvKHo9vbk7ynlFIWsEboFbOul1rr12ute0e//U6SMxa4Rugl3fyMSZJ/n5EgdP9CFgc9ppv18ptJPltrfSFJaq3PLHCN0Cu6WS81yerR7TVJ/nYB64OeUmv9RpLnZzjk4iS31hHfSbK2lPKahanu2BDuJKcn+Ztx3z852jblMbXWQ0leTHLSglQHvaWb9TLelUn+27xWBL1t1jVTSnlrktfVWv90IQuDHtTNz5hzk5xbSrmvlPKdUspM/wsLS1k36+XGJL9eSnkyyb1JPrwwpUGTjvbfOT1nYLEL6AFTvQJn8u+H7+YYOB50vRZKKb+e5MIkPzOvFUFvm3HNlFL6MvJ23ysWqiDoYd38jBnIyEvm352RV4Z+s5Syqda6e55rg17TzXq5PMmXaq2fKqW8Pckfja6Xn8x/edCc5v/N75U7I4nc68Z9f0aOfMli55hSykBGXtY400u6YKnqZr2klPLzSW5I8v5a64EFqg160Wxr5oQkm5L8j1LK/03y00nu8aHKHKe6/TvZ3bXWl2utjyf5fkbCHjjedLNerkxyZ5LUWr+dZFmS9QtSHbSnq3/n9DLhTvK/kpxTSjmzlDKUkQ8bu2fSMfck+Y3R7UuSfK3W2lSKB8fIrOtl9C0mv5eRYMdnIXC8m3HN1FpfrLWur7VurLVuzMjnVL2/1rpjccqFRdXN38n+a5KfTZJSyvqMvE3rsQWtEnpDN+vlr5O8J0lKKW/KSLjz7IJWCe24J8kHR39r1k8nebHW+neLXdTROO7fllVrPVRKuSbJnyXpT/LFWutDpZSPJdlRa70nyS0ZeRnjIxl5xc5li1cxLJ4u18vWJKuSfHn0c8f/utb6/kUrGhZRl2sGSNfr5c+SvK+U8nCSw0muq7X+cPGqhsXR5Xr5SJLfL6Vcm5G3l1zhP6g5XpVSbs/IW3rXj34O1UeTDCZJrfXzGflcqouSPJJkb5J/ujiVzl2xvgEAAADa5W1ZAAAAAA0T7gAAAAA0TLgDAAAA0DDhDgAAAEDDhDsAAAAA86iU8sVSyjOllAe7OPamUsp3R7/+qpSye9Zz/LYsAAAAgPlTSnlXkh8nubXWuukozvtwkrfWWv/ZTMd55Q4AAADAPKq1fiPJ8+PbSilnl1L+eynl/lLKN0sp501x6uVJbp+t/4FjVCcAAAAA3ftCkg/VWn9QSvmpJJ9L8nNjO0spG5KcmeRrs3Uk3AEAAABYQKWUVUn+YZIvl1LGmocnHXZZku211sOz9SfcAQAAAFhYfUl211rfMsMxlyW5utvOAAAAAFggtdY9SR4vpfxKkpQRbx7bX0p5Y5ITk3y7m/6EOwAAAADzqJRye0aCmjeWUp4spVyZ5NeSXFlK+cskDyW5eNwplye5o3b5K879KnQAAACAhnnlDgAAAEDDhDsAAAAADRPuAAAAADRMuAMAAADQMOEOAAAAQMOEOwAAAAANE+4AAAAANOz/A7RiMjQ+aG9tAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1440x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 绘制箱型图分析\n",
    "bp_list = list(data['annual_inc'])\n",
    "\n",
    "plt.figure(figsize=(20,4)) # 建立图像\n",
    "plt.boxplot(bp_list, vert=False, flierprops = {\"marker\":\"o\",\"markerfacecolor\":\"steelblue\"})\n",
    "plt.show() # 展示箱型图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    1.304610e+05\n",
       "mean     7.866600e+04\n",
       "std      8.751727e+04\n",
       "min      0.000000e+00\n",
       "25%      4.500000e+04\n",
       "50%      6.500000e+04\n",
       "75%      9.500000e+04\n",
       "max      9.930475e+06\n",
       "Name: annual_inc, dtype: float64"
      ]
     },
     "execution_count": 230,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 观察annual_inc列的统计值\n",
    "data['annual_inc'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "箱型图上须： 170000.0\n",
      "箱型图下须： -30000.0\n"
     ]
    }
   ],
   "source": [
    "# 箱型图的边界\n",
    "q1 = data['annual_inc'].describe()['25%']\n",
    "q3 = data['annual_inc'].describe()['75%']\n",
    "iqr = q3 - q1\n",
    "print(\"箱型图上须：\", q3 + 1.5*iqr)\n",
    "print(\"箱型图下须：\", q1 - 1.5*iqr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "由上述可见大多数贷款人的年收入都在150万以下，与Lending Club的主业中小额贷款也较为相符，可以将箱型图中的大于150万的异常值进行一定的处理。这里将其视为缺失值，采用固定值进行替代，统一定为150万。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "此时annual_inc特征列还有0个大于150万的特征值\n"
     ]
    }
   ],
   "source": [
    "data.loc[data['annual_inc'] > 1500000.0, 'annual_inc'] = 1500000.0    # 固定值替代\n",
    "print(\"此时annual_inc特征列还有%d个大于150万的特征值\" % (data[data['annual_inc'] > 1500000.0]).shape[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "总结：\n",
    "本章节主要介绍了数据预处理中常见的两种问题--缺失值和异常值。我们利用Lending Club贷款数据集进行了两种问题的分析，并对两种问题提出了相对应的解决方法。其中对于缺失值，我们注意介绍了采用直接删除，数据填补和不处理三种方法进行处理。而异常值的处理我们注意介绍了：直接删除异常样本，用均值替代异常样本，视为缺失值这三种解决方法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
